# ### Replication Data for "Longer Trips to Court Cause Evictions" by Hoffman and Strezhnev
# ### Appendix 9 - Harris County

# ## Libraries 
library(tidyverse)
library(haven)
library(geosphere)
library(lubridate)
library(sf)
library(jsonlite)
library(ggstar)
library(viridis)
library(estimatr)
library(fixest)

# Binned scatterplots
source("stat_binscatter.R")

######################################################
## Load the combined Harris data

evictions <- read_csv("Data/harris_data_merged.csv")

#################################
### Houston Geography
###################################

#get width given height
wd_hex <- function(height){
  tri_side <- height/2
  sma_side <- height/4
  width <- 2*sqrt(tri_side^2 - sma_side^2)
  return(width)
}

#now to figure out the area if you want
#side is simply height/2 in geom_hex
hex_area <- function(side){
  area <- 6 * (  (sqrt(3)*side^2)/4 )
  return(area)
}

#So if you want your hexagon to have a regular area need the inverse function
#Gives height and width if you want a specific area
hex_dim <- function(area){
  num <- 4*area
  den <- 6*sqrt(3)
  vert <- 2*sqrt(num/den)
  horz <- wd_hex(vert)
  return(c(vert,horz))
}
## Truncated mean function - returns mean if vector length is above minBin otherwise NA
mean_trunc <- function(x, minBin=25){
  if (length(x) >= minBin){
    return(mean(x))
  }else{
    return(NA)
  }
}

mapTheme <- function(base_size = 12) {
  theme(
    text = element_text( color = "black"),
    plot.title = element_text(size = 14,colour = "black"),
    plot.subtitle=element_text(face="italic"),
    plot.caption=element_text(hjust=0),
    axis.ticks = element_blank(),
    panel.background = element_blank(),axis.title = element_blank(),
    axis.text = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.grid.minor = element_blank()
    #panel.border = element_rect(colour = "black", fill=NA, size=2)
  )
}

# Merge to Harris county shapefile

# Tract Map
houstonTractMap <- read_sf(dsn="Data/Houston Tracts", layer="2010_Census Tracts") %>% filter(FIPS == 201)
houstonTractMap <- st_transform(houstonTractMap, crs="WGS84")
houstonTractMap$TRACT10 <- paste(houstonTractMap$STATE, houstonTractMap$FIPS, houstonTractMap$TRT, sep="")

# Block Map
houstonBlockMap <- read_sf(dsn="Data/Houston Blocks", layer="2010_Census Blocks") %>% filter(CNTY == 201)
houstonBlockMap <- st_transform(houstonBlockMap, crs="WGS84")
houstonBlockMap <- st_make_valid(houstonBlockMap)

# Precinct Map
houstonPrecinctMap <- read_sf(dsn="Data/Houston Precincts", layer="Harris_County_Constable_Precincts")

# Join on Tract
evictions_join <- st_as_sf(evictions, coords=c("google_longitude", "google_latitude"), crs="WGS84")
evictions_join <- st_join(evictions_join, houstonTractMap)
evictions$TRACT10 <- evictions_join$TRACT10

# Drop all where we couldn't merge to tract
evictions <- evictions %>% filter(!is.na(TRACT10))
nrow(evictions)

# Join on Block
evictions_join <- st_as_sf(evictions, coords=c("google_longitude", "google_latitude"), crs="WGS84")
evictions_join <- st_join(evictions_join, houstonBlockMap)

# Drop duplicates - likely on block borders
evictions_join <- evictions_join %>% filter(!duplicated(xcasenum))
evictions$BLOCKID <- evictions_join$SCTBKEY

## Concatenate block ID
evictions$BLOCKID_FULL <- paste("1000000US", evictions$BLOCKID, sep="")
evictions$TRACT10_FULL <- paste("1400000US", evictions$TRACT10, sep="")

# Join on Harris County Judicial Precinct
evictions_join <- st_as_sf(evictions, coords=c("google_longitude", "google_latitude"), crs="WGS84")
evictions_join <- st_join(evictions_join, houstonPrecinctMap)

evictions$PRECINCT <- as.numeric(str_extract(evictions_join$PRECINCT, "[0-9]"))

# How many are in the correct precinct
mean(evictions$PRECINCT == evictions$`Courthouse Precinct`)

# Drop if not in correct precinct
evictions <- evictions %>% mutate(inPrecinct = case_when(PRECINCT == `Courthouse Precinct` ~ 1,
                                                         TRUE ~ 0))

evictions <- evictions %>% filter(inPrecinct == 1)

# Check sample sizes + number of public housing evictions
nrow(evictions)
table(evictions$publichousing)

#########################################################
### Parse the outcome

# Make judgment adn disposition dates numeric
evictions$dispositiondate <- mdy(evictions$dispositiondate)

# Recode everyting to just do OCA
evictions <- evictions %>% mutate(dispositionOutcome = case_when(is.na(dispositiondesc) ~ NA_character_,
                                                                 dispositiondesc == "DEFAULT JUDGMENT (OCA)"&xoutcome == "FOR PLAINTIFF" ~ "For Plaintiff by Default",
                                                                 dispositiondesc == "DEFAULT JUDGMENT (OCA)"&xoutcome == "FOR DEFENDANT" ~ "For Defendant by Default",
                                                                 dispositiondesc == "DEFAULT JUDGMENT (OCA)"&xoutcome == "DISMISSED" ~ "Dismissed",
                                                                 dispositiondesc == "TRIAL OR HEARING BY JUDGE (OCA)"&xoutcome == "FOR PLAINTIFF" ~ "For Plaintiff",
                                                                 dispositiondesc == "TRIAL OR HEARING BY JUDGE (OCA)"&xoutcome == "FOR DEFENDANT" ~ "For Defendant",
                                                                 dispositiondesc == "TRIAL OR HEARING BY JUDGE (OCA)"&xoutcome == "DISMISSED" ~ "Dismissed",
                                                                 dispositiondesc == "TRIAL BY JURY (OCA)"&xoutcome == "FOR PLAINTIFF" ~ "For Plaintiff",
                                                                 dispositiondesc == "TRIAL BY JURY (OCA)"&xoutcome == "FOR DEFENDANT" ~ "For Defendant",
                                                                 dispositiondesc == "TRIAL BY JURY (OCA)"&xoutcome == "DISMISSED" ~ "Dismissed",
                                                                 dispositiondesc == "NON-SUITED OR DISMISSED BY PLAINTIFF (OCA)" ~ "Dismissed",
                                                                 dispositiondesc == "DISMISSED FOR WANT OF PROSECUTION (OCA)" ~ "Dismissed",
                                                                 dispositiondesc == "DISP AT TRIAL-DISMISSED BY PROSECUTION (OCA)" ~ "Dismissed",
                                                                 dispositiondesc == "AGREED JUDGMENT (OCA)" ~ "Judgment by Agreement",
                                                                 dispositiondesc == "APPEAL FILED" ~ "Appeal in progress",
                                                                 TRUE ~ "Other"))




# Figure out the outcomes
evictions <- evictions %>% mutate(plaintiffWinDefault = as.integer(dispositionOutcome == "For Plaintiff by Default"))


### Subset down to non-missing + have outcome
evictions_final <- evictions %>% filter(search_status == "OK"&inPrecinct == 1&driving_duration < 200*60&!is.na(dispositiondesc))

nrow(evictions_final)
table(evictions_final$publichousing)

## Crow flies
evictions_final$distToCourt <- distHaversine(p1 = cbind(evictions_final$google_longitude, evictions_final$google_latitude), 
                           p2 = cbind(evictions_final$`Courthouse Longitude`, evictions_final$`Courthouse Latitude`))

evictions_final$distToCourtKM <- evictions_final$distToCourt/1000

evictions_final$distRing <- floor(evictions_final$distToCourtKM)
evictions_final$distRing2k <- floor(evictions_final$distToCourtKM/2)
evictions_final$distRing5k <- floor(evictions_final$distToCourtKM/5)

# Commuting time
evictions_final$driving_duration_min <- evictions_final$driving_duration/60
evictions_final$driving_duration_10min <- evictions_final$driving_duration_min/10
evictions_final$driving_duration_hour <- evictions_final$driving_duration_min/60


#####
# Plot outcome distribution
evictions_final  %>%
  count(dispositionOutcome = factor(dispositionOutcome)) %>% 
  mutate(pct = prop.table(n)) %>% 
  ggplot(aes(x = fct_reorder(dispositionOutcome,-pct), y = pct, label = scales::percent(pct))) + 
  geom_col(position = 'dodge') + 
  geom_text(position = position_dodge(width = .9),    # move to center of bars
            vjust = -0.5,    # nudge above top of bar
            size = 3) + 
  scale_y_continuous(labels = scales::percent) + theme_bw() + xlab("") + 
  ylab("Percentage of judgments") + theme(axis.text.x = element_text(angle = 45, vjust =1, hjust=1))

ggsave("Output/Figures/houston_eviction_outcomes_summary.pdf", width=5, height=5)

# Plot over time
evictions_final %>% filter(publichousing==0) %>% ggplot(aes(x=casefiledate, y=plaintiffWinDefault)) + geom_smooth(se=F, span=1) + stat_binscatter() +
  theme_bw() + xlab("Date of filing") + ylab("Share of cases with default judgments\nagainst the defendant") + scale_colour_manual(values=c("indianred", "dodgerblue")) +
  theme(legend.position="bottom", legend.box = "horizontal") + guides(color = guide_legend(title.position="top", title.hjust = 0.5))

ggsave("Output/Figures/houston_default_rate_time.pdf", width=5, height=4)

#####
## Read in ACS data

### Census block-level racial demographics
census_race <- read_csv("Data/ACS/Block/2010_houston_block_race.csv")
census_race <- census_race %>% select(BLOCKID_FULL  = GEO_ID, totalpopBlock = P001001, totalwhiteBlock = P001003, totalblackBlock = P001004)

evictions_final <- merge(evictions_final, census_race, by="BLOCKID_FULL", all.x=T)
evictions_final$shareWhiteBlock = evictions_final$totalwhiteBlock/evictions_final$totalpopBlock
evictions_final$shareBlackBlock = evictions_final$totalblackBlock/evictions_final$totalpopBlock

### Census Block Hispanic
census_hispanic <- read_csv("Data/ACS/Block/2010_houston_block_hispanic.csv")
census_hispanic <- census_hispanic %>% select(BLOCKID_FULL = GEO_ID, totalpopBlockHispanic = P002001, totalHispanicBlock = P002002)

evictions_final <- merge(evictions_final, census_hispanic, by="BLOCKID_FULL", all.x=T)
evictions_final$shareHispanicBlock = evictions_final$totalHispanicBlock/evictions_final$totalpopBlockHispanic

mean(is.na(evictions_final$shareHispanicBlock))
mean(is.na(evictions_final$shareWhiteBlock))

### ACS Income
acs_income <- read_csv("Data/ACS/Tract/acs_houston_income.csv", na = c("", "NA", "-", "null"))

## Median household income
acs_household_income <- acs_income %>% select(TRACT10_FULL=GEO_ID, medianIncome=S1901_C01_012E)
## 250k+
acs_household_income$medianIncome[acs_household_income$medianIncome == "250,000+"] <- "250000"
acs_household_income$medianIncome <- as.numeric(acs_household_income$medianIncome)

## Merge
evictions_final <- merge(evictions_final, acs_household_income, by="TRACT10_FULL", all.x=T)

### ACS Housing value
acs_housing <- read_csv("Data/ACS/Tract/acs_houston_housing.csv",na = c("", "NA", "-", "null"))

## Additional housing covariates (median Gross Rent + number of renter occupied units)
acs_housing_info <- acs_housing %>% select(TRACT10_FULL=GEO_ID, medianGrossRent =DP04_0134E, numRenterOccupied = DP04_0047E)
evictions_final <- merge(evictions_final, acs_housing_info, by="TRACT10_FULL", all.x=T)

### ACS 
acs_contract <- read_csv("Data/ACS/Tract/acs_houston_contractRent.csv",na = c("", "NA", "-", "null"))

## Grab median household income
acs_contract_info <- acs_contract%>% select(TRACT10_FULL=GEO_ID, medianContractRent =B25058_001E)
## Merge
evictions_final <- merge(evictions_final, acs_contract_info, by="TRACT10_FULL", all.x=T)

### ACS Race
acs_race <- read_csv("Data/ACS/Tract/acs_houston_race.csv",na = c("", "NA", "-", "null"))

## Grab median household income
acs_race_subset <- acs_race %>% select(TRACT10_FULL=GEO_ID, totalPopulation = DP05_0001E,  whitepct = DP05_0032PE, blackpct = DP05_0033E, hispanicpct = DP05_0065E)

## Merge
evictions_final <- merge(evictions_final, acs_race_subset, by="TRACT10_FULL", all.x=T)


##################################################
## Block fixed effects
evictions_final_complete <- evictions_final %>% filter(!is.na(driving_duration_min)&!is.na(plaintiffWinDefault)&publichousing==0&!is.na(medianIncome)&!is.na(medianContractRent)&!is.na(shareWhiteBlock)&!is.na(shareHispanicBlock))
evictions_final_complete$BLOCKIDFACTOR <- as.factor(evictions_final_complete$BLOCKID)

evictions_final_complete$filemonthyr <- paste(month(evictions_final_complete$casefiledate), year(evictions_final_complete$casefiledate), sep="-")
evictions_final_complete$fileyr <- year(evictions_final_complete$casefiledate)


##############################################
## Simple bivariate regression

### Get colors from colorbrewer
palette_use <- RColorBrewer::brewer.pal(4, "Set1")

### Sequential Map by 
evictions_final_complete %>% ggplot(aes(y= plaintiffWinDefault, x = driving_duration_min, colour = palette_use[2])) + scale_colour_identity() +
  geom_smooth(method = "lm_robust", lwd = 2, method.args = list(cluster = evictions_final_complete$Match_addr, se_type = "CR0")) + stat_binscatter(scalefactor=1e-4, scalepoints = F) + coord_cartesian(ylim=c(.3, .55)) +
  xlab("Driving commuting time (minutes)") + ylab("Probability of Plaintiff Default")  + theme_bw()

ggsave("Output/Figures/simple_bivariate_reg_houston.pdf", width=5, height=4)

## Adjust for court number FEs
simple_court_fe <- feols(plaintiffWinDefault ~ driving_duration_min | court_number, data=evictions_final_complete, cluster = ~Match_addr, demeaned = T)

evictions_final_complete$driving_duration_min_excess <- simple_court_fe$X_demeaned[,1]
evictions_final_complete$plaintiffWinDefault_excess <- simple_court_fe$y_demeaned

evictions_final_complete %>% ggplot(aes(y= plaintiffWinDefault_excess, x = driving_duration_min_excess, colour = palette_use[2]))  + geom_hline(yintercept=0, lty=2) + geom_vline(xintercept=0, lty=2) + scale_colour_identity() +
  geom_smooth(method = "lm_robust", lwd = 2, method.args = list(cluster = evictions_final_complete$Match_addr, se_type = "CR0")) + stat_binscatter(scalefactor=1e-4, scalepoints = F) +
  xlab("Driving commuting time (de-meaned by courthouse, minutes)") + ylab("Probability of Plaintiff Default (de-meaned by courthouse)")  + theme_bw() + ggtitle("Courthouse fixed effects")

ggsave("Output/Figures/simple_bivariate_reg_houston_courtFEs.pdf", width=5, height=4)

##################
## Regressions from the main model

# No covariate adjustment
reg_0  <- feols(plaintiffWinDefault ~ driving_duration_10min, data=evictions_final_complete, cluster = ~Match_addr, demeaned = T)

# Covariates
reg_1 <- feols(plaintiffWinDefault ~ driving_duration_10min + log(medianIncome) + log(medianContractRent) +
                         shareWhiteBlock + I(shareWhiteBlock^2)   + shareHispanicBlock + I(shareHispanicBlock^2) | court_number^filemonthyr, data=evictions_final_complete, cluster = ~Match_addr, demeaned = T)


# Block-month FEs
reg_2 <- feols(plaintiffWinDefault ~ driving_duration_10min | BLOCKIDFACTOR^filemonthyr, data=evictions_final_complete, cluster = ~Match_addr, demeaned = T)

# Building - month FEs
reg_3 <- feols(plaintiffWinDefault ~ driving_duration_10min | Match_addr^filemonthyr, data=evictions_final_complete, cluster = ~Match_addr, demeaned = T)

# Combine into a figure
results_plot_main <- bind_rows(
  tidy(reg_0) %>% filter(term == "driving_duration_10min") %>% mutate(sample = "Main Analysis: Harris County: 2000-2018", model = "No Controls", nObs = nrow(evictions_final_complete), 
                                                                                 nBuilding = length(unique(evictions_final_complete$Match_addr))),
  tidy(reg_1) %>% filter(term == "driving_duration_10min") %>% mutate(sample ="Main Analysis: Harris County: 2000-2018", model = "Controls +\nCourthouse x Month-Year FE", nObs = nrow(evictions_final_complete), 
                                                                            nBuilding = length(unique(evictions_final_complete$Match_addr))),
  tidy(reg_2) %>% filter(term == "driving_duration_10min") %>% mutate(sample = "Main Analysis: Harris County: 2000-2018", model = "Census Block x Month-Year FE", nObs = nrow(evictions_final_complete), 
                                                                             nBuilding = length(unique(evictions_final_complete$Match_addr))),
  tidy(reg_3) %>% filter(term == "driving_duration_10min") %>% mutate(sample = "Main Analysis: Harris County: 2000-2018", model = "Building x Month-Year FE", nObs = nrow(evictions_final_complete), 
                                                                                 nBuilding = length(unique(evictions_final_complete$Match_addr))))

results_plot_main$conf.low = results_plot_main$estimate - qnorm(.975)*results_plot_main$std.error
results_plot_main$conf.high = results_plot_main$estimate + qnorm(.975)*results_plot_main$std.error

results_plot_main$sample_full <- paste(results_plot_main$sample, "\nN Evictions = ", results_plot_main$nObs, ", N Buildings = ", results_plot_main$nBuilding, sep="")

results_plot_main$model <- factor(results_plot_main$model, levels=rev(c("No Controls", "Controls +\nCourthouse x Month-Year FE", "Census Block x Month-Year FE",  "Building x Month-Year FE")))
results_plot_main$color <- ifelse(results_plot_main$p.value < .05, "black", "grey50")

main_plot <- results_plot_main %>% ggplot(aes(x=estimate, xmin=conf.low, xmax=conf.high, y=model, colour = color))  + geom_vline(xintercept = 0, lty=2)  + scale_colour_identity() +
  geom_pointrange() + facet_wrap(~sample_full, scales="free", ncol=1) + theme_bw() + theme(axis.text = element_text(size = 6)) + theme(axis.text.y=element_text(vjust=.8, hjust=0, size=10)) +
  xlab("Estimated effect of a 10 minute increase in driving\ntime on probability of default") + ylab("")

ggsave("Output/Figures/main_results_houston_presentation.pdf", plot = main_plot, width=6, height=4)


## Counterfactual prediction task (what if everyone had a 5 minute drive)
evictions_final_complete$driving_duration_10min_min = sapply(evictions_final_complete$driving_duration_10min, function (x) min(x, .5))

evictions_final_complete$driving_duration_10min_chg = evictions_final_complete$driving_duration_10min_min - evictions_final_complete$driving_duration_10min

evictions_final_complete$effect_transit = coef(reg_1)[1]*evictions_final_complete$driving_duration_10min_chg
evictions_final_complete$effect_transit2 = coef(reg_3)[1]*evictions_final_complete$driving_duration_10min_chg

# Effect in the covariate-adjusted model
mean(evictions_final_complete$effect_transit)
sum(evictions_final_complete$effect_transit)

# Effect from the building-FE model
mean(evictions_final_complete$effect_transit2)
sum(evictions_final_complete$effect_transit2)


## Generate the regression tables
library(texreg)
texreg(list(extract(reg_0, include.proj.stats = F),extract(reg_1, include.proj.stats=F),extract(reg_2, include.proj.stats=F), extract(reg_3, include.proj.stats=F)), stars=c(.01, .05), digits = 4, include.ci=F)

